Se ha vuelto de interés detectar el sentimiento de los mensajes en Twitter. Para nuestro caso, hemos usado el el clasificador propuesto por Elliot Hoffman para el caso especial de la lengua Española.
Como la data se está recolectando para el proceso 2018, presentaremos los Tweets en bloques del 15 de julio (fin del mundial de Futbol de Rusia) hasta el 31 de agosto; y del 1 de setiembre al 23 de setiembre (un dÃa antes del debate). Los Tweets han sido filtrados, es decir, no aparecen los reTweets que un candidato haya hecho de otro usuario. Asà mismo, cada Tweet ha sido depurado, eliminando direcciones web, y palabras con #. Cada sentimiento calculado para un Tweet, es acompañado de su aceptación (cantidad de me gusta), y de promoción o exposición (cantidad de retweeteo).
from classifier import *
clf = SentimentClassifier()
Los siguientes gráficos resumen las tres variables medidas por cada Tweet:
%matplotlib inline
import matplotlib.pyplot as plt
import json
import pandas as pd
candidatoFile='renzo_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
import json
import pandas as pd
candidatoFile='renzo_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='rbc_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='rbc_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='urresti_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='urresti_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='lay_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='EstherCapunay_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='EstherCapunay_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='BeingoleaA_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='BeingoleaA_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='son_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='son_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='JorgeMunozAP_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='JorgeMunozAP_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='jaimesalinas80_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='jaimesalinas80_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='JulioGagoPe_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='JulioGagoPe_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='ENRIQUECORNEJOR_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='ENRIQUECORNEJOR_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='EnriquePorLima_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='EnriquePorLima_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='DitelColumbus_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='DitelColumbus_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='GGG_pe_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='GGG_pe_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='juancarloszurek_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='juancarloszurek_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='kikeocrospoma_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='kikeocrospoma_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='mikausape_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='mikausape_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='ManuelVelardeD_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='ManuelVelardeD_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='JoseLuisGil1000_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='JoseLuisGil1000_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='GomezBacaxLima_ago'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])
candidatoFile='GomezBacaxLima_predeb'
with open(candidatoFile, 'r') as fd:
tweets=json.load(fd)
tweetText=[]
for tw in tweets:
try:
tweetText.append((tw['id'],tw['text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
except:
tweetText.append((tw['id'],tw['full_text'],tw['created_at'],tw['favorite_count'],tw['retweet_count']))
rows=[]
for idt,txt,date,likes,retws in tweetText:
if not txt.startswith('RT'):
txt = " ".join([word for word in txt.split()
if 'http' not in word
and not word.startswith('@')
and not word.startswith('#')
and word != 'RT'
])
if not txt.startswith(('Te invito a darte','Te invito a ingresar')):
senti=clf.predict(txt)
rows.append([idt,likes,retws,senti,date,txt])
df=pd.DataFrame(rows,columns=["idT","Likes","ReTweets","Positividad","fecha","txt"])
df["fecha"]=pd.to_datetime(df["fecha"])
df.sort_values(by=['fecha'],inplace=True,ascending=True)
df.set_index('fecha',inplace=True)
df.to_csv(candidatoFile+'_data.csv',encoding='utf-8')
axes=df.drop(columns=['txt','idT']).plot(subplots=True,figsize=(18, 10),grid=True,sharex=True)
axes[2].axhline(y=0.5, color='r', linestyle='--')
plt.show()
print ("Cantidad de Tweets: ",df.shape[0])